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FIG. 2 



for (i = 0; i < 128; ) { 
dpref(&a[i+32]); 
for (j = 0; j < 32; { 
x + = a[i]; 

} 

> 

i 
i 
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FIG. 4 
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FIG. 5 
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FIG. 7 



Q 



START 

(COPY-TYPE INNER LOOP SPLITTING) 





i , — ' 


POST-SUBDIVIDING INNER LOOP COUNT 
= DT/NUMBER OF SPLITS 


N 




COPY INNER LOOP CORRESPONDING TO 
NUMBER OF SPLITS 


\ 


f s — ' 


MODIFY EACH INNER LOOP COUNT AFTER 
SUBDIVIDING TO POST-SUBDIVIDING 
INNER LOOP COUNT 


> 


/ ^ 


ADD REMAINDER LEFT OVER AFTER 
(DT/NUMBER OF SPLITS) TO LOOP COUNT 
OF POST-SUBDIVIDING HEAD LOOP 


\ 


( 



END 



(COPY-TYPE INNER LOOP SPLITTING) 



S41 
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FIG. 8 



c 



START 

(CONDITION-TYPE INNER LOOP SPLITTING) 



> 




POST-SUBDIVIDING INNER LOOP COUNT 
= DT/N UMBER OF SPLITS 


N 


i , — ' 


GENERATE INNER LOOP CONDITION SWITCH TABLE | 


> 


i , " 


MODIFY EACH INNER LOOP COUNT 
CONDITION AFTER SUBDIVIDING TO 
POST-SUBDIVIDING INNER LOOP COUNT 


> 


f ^— " : 


ADD REMAINDER LEFT OVER AFTER (DT/NUMBER 
OF SPLITS) TO LOOP COUNT CONDITION OF 
POST-SUBDIVIDING HEAD LOOP 


\ 


f 
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END 

(CONDITION-TYPE INNER LOOP SPLITTING) 
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FIG. 10 



(START (INSTRUCTION INSERTION)) 



LOOP B 

REPEAT UNTIL INFORMATION 
LIST BECOMES EMPTY 




i) 



NO 



YES 



YES 



,S74 



INSERT INSTRUCTION 
FOR PREFETCHING DATA 
ONE LINE AHEAD 




INSERT INSTRUCTION 
FOR PREFETCHING DATA 
TWO LINES AHEAD 



DELETE ANALYZED 
INFORMATION FROM 
INFORMATION LIST 



,S76 



LOOP B 



3 



(END (INSTRUCTION INSERTION)) 
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FIG. 11 



PEELING IS UNNECESSARY 



(a) A IS OF FOUR-BYTE TYPE. ^282 



(c) 



for(i=0;i<128;){ 

for(j=0;j<32;j++,i++){ 
sum+=A[i]; 

} 

> 



INSTRUCTION 
INSERTION 



for(i=0;i<128;){ 
dpref(&A[i+32]); 

for(j=0;j<32;j++,i++){ 
sum+=A[i]; 

> 

> 



for(i 


= 0;i<128;i++){ 




I sum+=A[i]; 




> 








STRUCTURE f 




) -"^ 


TRANSFORMATION L 





286 
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FIG. 12 



INPUT PROGRAM SOURCE IN C LANGUAGE 

.240 

intA[1000]; 
int main(void) 
int i: 

int sum = 0; 

for ( i=0; j<128;i++) { 
sum += A[ i ]; 

> 

return sum; 
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FIG. 24 



(a) A IS OF FOUR-BYTE TYPE 



for(i=0;i<N;i++){ 
sum+=A[i]; 
sum+=A[i+l]; 
sum+=A[i+2]; 

-SKIP— 
sum+=A[i+30]; 
sum+=A[i+31]; 



(b) 



INSTRUCTION 
INSERTION 



for(i=0;i<N;i++){ 
dpref(&A[i+32]); 
sum+=A[i]; 
sum+=A[i+l]; 
sum+=A[i+2]; 

-SKIP— 
sum+=A[i+30]; 
sum+=A[i+3lj; 



371 



WHEN IT IS JUDGED 
THAT LOOP STRUCTURE 
TRANSFORMATION 
IS UNNECESSARY, 
INSTRUCTION IS 
INSERTED WITHOUT 
STRUCTURE 
TRANSFORMATION. 



372 
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FIG. 25 



A IS OF FOUR- BYTE TYPE 



381 



for(i=0;i<128;i++){ 
sum+=A[i]; 

> 




INSTRUCTION INSERTION 
(TWO LINES AHEAD) 
AFTER NORMAL 
TRANSFORMATION 



dpref(&A[0]); 
dpref(&A[32]); 

■ 
■ 

for(i=0;i<128;){ 
dpref(&A[i+64]); 

for(j=0;j<32;j++,i++K 
sum+=A[i]; 

} 

> 



SAME TRANSFORMATION 
AS IN THE CASE WHERE 
ELEMENTS ARE ALIGNED 



382 
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FIG. 26 



(a) 



(b) 



391 



for(i=0;i<140;i++){ 
sum+=A[i]; 

> 




INSTRUCTION INSERTION 
(TWO LINES AHEAD) 
AFTER NORMAL 
TRANSFORMATION 



dpref(&A[0]); 
dpref(&A[32]); 

■ 
■ 

for(i=0;i<i40;X 
dpref(&A[i+64]); 

if(i> = 128)n=140-128; 
else n=32; 

for(j=0;j<n;j++,i++K 
sum+=A[i]; 

> 



■SAME TRANSFORMATION 
AS IN THE CASE WHERE 
ELEMENTS ARE ALIGNED 

.392 
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FIG. 28 



431 





A[0] 


m m m 


A[n-1] 


A[n] 


• • • 


A[n+31] 


A[n+32] 


• • • 
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432 
433 



28/34 




29/34 




30/34 



FIG. 31 



int b[128]: 

#pragma _loop_tiling_dpref b 

for (i=0; i<128; + ) 

{ 

a[i] = b[i]; 

> 



(b) for (i=0; i<128; ) 

dpref(&b[i+32]); 
for (j =0; j<32; 
a[i] = b[i]; 

> 

> 
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FIG. 32 



A IS OF FOUR-BYTE TYPE ^5 02 

for(i=0;i<128;i++) { 
A[i] = val * i ; 

} 




for(i=0;i<128;) { 

for(j = 0;j<32;j++,i++) { 



A[i] = val * i; 

} 

> 




for(i=0;i<128; ) { 
PreTouch(&A[i]); 
for(j=0;j<32;j++,i++) { 



A[i] = val * i; 

> 
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